This workshop was mainly adapted from the mixOmics tutorials (http://mixomics.org) and the Bioconductor vignette (https://bioconductor.org/packages/release/bioc/html/mixOmics.html).

Suggested answers for the practice sections of the workshop are provided below.

1 Load dataset

library(mixOmics)
## Loading required package: MASS
## Loading required package: lattice
## Loading required package: ggplot2
## 
## Loaded mixOmics 6.24.0
## Thank you for using mixOmics!
## Tutorials: http://mixomics.org
## Bookdown vignette: https://mixomicsteam.github.io/Bookdown
## Questions, issues: Follow the prompts at http://mixomics.org/contact-us
## Cite us:  citation('mixOmics')
data(breast.TCGA)
# Extract training data and name each data frame
# There are 3 dataframes (3 omics of the same samples), stored as list
# In each dataframe, the rows represent samples.
# Columns represent feature measurments (genes, miRNA, protein)
X <- list(miRNA = breast.TCGA$data.train$mirna, 
          mRNA = breast.TCGA$data.train$mrna, 
          protein = breast.TCGA$data.train$protein)
# Vector of cancer subtype values, the experimental group
Y <- breast.TCGA$data.train$subtype

2 Session 1

2.1 PCA on miRNA and Protein

  • Note that CLR is usually helpful for proportion data (e.g. RNA count data), and won’t work with negative values.
pca.miRNA <- pca(X$miRNA, ncomp=10, center = TRUE, scale = TRUE, logratio = "CLR")
plotIndiv(pca.miRNA, ind.names = FALSE, group = Y, ellipse = TRUE, 
          ellipse.level = 0.6,legend = TRUE, 
          legend.title = "Cancer\nSubtype", title = "PCA of miRNA data")

# removed CLR transformation for the protein dataset
pca.protein <- pca(X$protein, ncomp=10, center = TRUE, scale = TRUE)
plotIndiv(pca.protein, ind.names = FALSE, group = Y, ellipse = TRUE, 
          ellipse.level = 0.6,legend = TRUE, 
          legend.title = "Cancer\nSubtype", title = "PCA of protein data")

# check top loadings on component 1 for miRNA data
plotLoadings(pca.miRNA, comp = 1, ndisplay = 10)

plotLoadings(pca.miRNA, comp = 2, ndisplay = 10)

# check top loadings on component 1/2 for protein data
plotLoadings(pca.protein, comp = 1, ndisplay = 10)

plotLoadings(pca.protein, comp = 2, ndisplay = 10)

plotVar(pca.miRNA, cutoff=0.7)

plotVar(pca.protein, cutoff=0.7)

3 Session 2

3.1 miRNA splsda

  • Perform supervised clustering (sPLSDA) based on the miRNA data layer, to optimally separate the three subtypes.
test.keepX <- round(2^seq(3,6,0.25))
test.keepX
##  [1]  8 10 11 13 16 19 23 27 32 38 45 54 64
set.seed(123)
# Slow step:
tune.splsda.miRNA <- tune.splsda(X$miRNA, Y = Y, ncomp = 4, validation = "Mfold", 
                                folds = 5, test.keepX = test.keepX,
                                dist = "centroids.dist",
                                scale = TRUE, logratio = "CLR", 
                                nrepeat = 60, cpus = 3, progressBar = FALSE, 
                                measure = "BER")
saveRDS(tune.splsda.miRNA, file = "tune.splsda.miRNA.rds")
tune.splsda.miRNA <- readRDS("tune.splsda.miRNA.rds")

plot(tune.splsda.miRNA)

tune.splsda.miRNA$choice.ncomp$ncomp
## [1] 4
tune.splsda.miRNA$choice.keepX
## comp1 comp2 comp3 comp4 
##    54    64    64    54
# Tuning suggests 4 components with 54,64,64,64 features.
# We might prefer a simpler model with 2 componets (54,64) features.

tuned.splsda.miRNA <- splsda(X$miRNA, Y = Y, ncomp = 3, keepX = c(54, 64, 64))

plotIndiv(tuned.splsda.miRNA, title="Tuned sPLSDA miRNA", 
          ind.names = FALSE, ellipse = TRUE, ellipse.level = 0.6, 
          legend.title = "Cancer\nSubtype", legend = TRUE)

plotIndiv(tuned.splsda.miRNA, title="Tuned sPLSDA miRNA", comp = c(1,3),
          ind.names = FALSE, ellipse = TRUE, ellipse.level = 0.6, 
          legend.title = "Cancer\nSubtype", legend = TRUE)

# 3d sample plot (not great for printing, but okay to have a look and consider the contribution of a third component)
# plotIndiv(tuned.splsda.miRNA, title="Tuned sPLSDA miRNA", comp = 1:3, 
#           style = "3d",
#           ind.names = FALSE, ellipse = TRUE, ellipse.level = 0.6, 
#           legend.title = "Cancer\nSubtype", legend = TRUE)

3.2 mRNA splsda Basal vs Other

  • How about if we want to separate the Basal subtype from the other two subtypes, in a binary classification?
Y.basal <- c("Other","Basal")[as.integer(Y=="Basal")+1]
Y.basal
##   [1] "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal"
##  [10] "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal"
##  [19] "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal"
##  [28] "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal"
##  [37] "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal"
##  [46] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
##  [55] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
##  [64] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
##  [73] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
##  [82] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
##  [91] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [100] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [109] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [118] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [127] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [136] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [145] "Other" "Other" "Other" "Other" "Other" "Other"
table(Y.basal)
## Y.basal
## Basal Other 
##    45   105
tune.splsda.mRNA.basal <- tune.splsda(X$mRNA, Y = Y.basal, ncomp = 3, validation = "Mfold", 
                                 folds = 5, test.keepX = test.keepX,
                                 dist = "centroids.dist",
                                 scale = TRUE, logratio = "CLR", 
                                 nrepeat = 60, cpus = 3, progressBar = FALSE, 
                                 measure = "BER")
saveRDS(tune.splsda.mRNA.basal, file = "tune.splsda.mRNA.basal.rds")
tune.splsda.mRNA.basal <- readRDS("tune.splsda.mRNA.basal.rds")

plot(tune.splsda.mRNA.basal)

tune.splsda.mRNA.basal$choice.keepX
## comp1 comp2 comp3 
##    32    54    23
# ^ one component with 27 mRNA features looks great for this.

tuned.splsda.mRNA.basal <- splsda(X$mRNA, Y = Y.basal, ncomp = 1, keepX = 27)
plotLoadings(tuned.splsda.mRNA.basal, contrib = "max", method = "median", 
             size.name = 0.5, size.title = 1,
             title = "Basal vs Other: mRNA signature")

3.3 miRNA prediction

  • There is miRNA data for the independent test samples (breast.TCGA$data.test$mirna), how accurately can your model classify them?
# Prepare test set data
data.test.miRNA <- breast.TCGA$data.test$mirna
Y.test <- breast.TCGA$data.test$subtype
table(Y.test)
## Y.test
## Basal  Her2  LumA 
##    21    14    35
predict.miRNA <- predict(tuned.splsda.miRNA, newdata = data.test.miRNA, 
                        dist = "centroids.dist")
confusion.mat.miRNA <- get.confusion_matrix(truth = Y.test, 
                                           predicted = predict.miRNA$MajorityVote$centroids.dist[,3])
confusion.mat.miRNA
##       predicted.as.Basal predicted.as.Her2 predicted.as.LumA
## Basal                 18                 3                 0
## Her2                   0                13                 1
## LumA                   0                 4                31
get.BER(confusion.mat.miRNA)
## [1] 0.1095238
# 11% balanced error rate on independent cases, 
# using miRNA sPLSDA model (3 components, keepX=c(54,64,64))

# how about a simpler model with 2 components:
tuned.splsda.miRNA.2 <- splsda(X$miRNA, Y = Y, ncomp = 2, keepX = c(54, 64))
predict.miRNA <- predict(tuned.splsda.miRNA.2, newdata = data.test.miRNA, 
                         dist = "centroids.dist")
confusion.mat.miRNA <- get.confusion_matrix(truth = Y.test, 
                                            predicted = predict.miRNA$MajorityVote$centroids.dist[,2])
confusion.mat.miRNA
##       predicted.as.Basal predicted.as.Her2 predicted.as.LumA
## Basal                 17                 4                 0
## Her2                   1                13                 0
## LumA                   0                 3                32
get.BER(confusion.mat.miRNA)
## [1] 0.115873
# 11.5% BER is a worse performance, 
# i.e. 3 miRNA components was better for miRNA predictive value

4 Session 3

4.1 rCCA

  • Perform rCCA using miRNA + mRNA data, including an arrow plot and correlation heatmap.
shrink.rcc.miRNA.mRNA <- rcc(X$miRNA,X$mRNA, method = 'shrinkage') 

# examine the optimal lambda values after shrinkage 
shrink.rcc.miRNA.mRNA$lambda 
##   lambda1   lambda2 
## 0.1098423 0.1210357
plot(shrink.rcc.miRNA.mRNA, type = "barplot", main = "Shrinkage component plot") 

plotArrow(shrink.rcc.miRNA.mRNA, group = Y, 
          col.per.group = color.mixo(1:3),
          title = 'rCCA', legend.title = "Cancer\nSubtype")

plotVar(shrink.rcc.miRNA.mRNA, var.names = c(FALSE, TRUE),
        cex = c(4, 4), cutoff = 0.6, col = c("black", "black"), overlap = FALSE,
        title = 'rCCA miRNA + mRNA')

network(shrink.rcc.miRNA.mRNA, comp = 1, interactive = FALSE,
        lwd.edge = 2,graph.scale = 0.5,
        cutoff = 0.6, color.node = color.mixo(4:5),
        save = "png", name.save = "miRNA.mRNA.network.comp1")
network(shrink.rcc.miRNA.mRNA, comp = 2, interactive = FALSE,
        lwd.edge = 2, graph.scale = 0.3,
        cutoff = 0.45, color.node = color.mixo(4:5),
        save = "png", name.save = "miRNA.mRNA.network.comp2")

cim(shrink.rcc.miRNA.mRNA, cutoff = 0.5, comp = 1:2, 
    xlab = "mRNA", ylab = "miRNA", 
    col.cex = 0.5, row.cex=0.5, margins=c(6,5),
    save = "png", name.save = "rcc.miRNA.mRNA.cim")

4.2 PLS

  • PLS2 analysis of mRNA vs protein data
spls.mRNA.protein <- spls(X = X$mRNA, Y = X$protein, ncomp = 5, mode = 'regression')
perf.spls.mRNA.protein <- perf(spls.mRNA.protein, validation = 'Mfold',
                               folds = 6, nrepeat = 5)
plot(perf.spls.mRNA.protein, criterion = 'Q2.total')

list.keepX <- 2^(3:7)
list.keepY <- c(3:10)
tune.spls.miRNA.RNA <- tune.spls(X$miRNA, X$mRNA, ncomp = 2,
                                 test.keepX = list.keepX,
                                 test.keepY = list.keepY,
                                 nrepeat = 1, folds = 6,
                                 mode = 'regression', measure = 'cor') 
plot(tune.spls.miRNA.RNA)         # use the correlation measure for tuning

optimal.keepX <- tune.spls.miRNA.RNA$choice.keepX
optimal.keepY <- tune.spls.miRNA.RNA$choice.keepY
final.spls.mRNA.protein <- spls(X$mRNA, X$protein, ncomp = 2, 
                                keepX = optimal.keepX,
                                keepY = optimal.keepY,
                                mode = "regression") # explanatory approach being used, 
# hence use regression mode
plotArrow(final.spls.mRNA.protein, rep.space = "XY-variate",
          ind.names = FALSE, group = Y, legend=TRUE, title = "sPLS mRNA+protein",
          legend.title = "Cancer\nSubtype")

color.edge <- color.GreenRed(50)  # set the colours of the connecting lines
network(final.spls.mRNA.protein, comp = 1:2,
        cutoff = 0.5,
        shape.node = c("rectangle", "circle"),
        color.node = color.mixo(5:6),
        size.node = 0.3,
        color.edge = color.edge,
        save = 'png',
        name.save = 'sPLS.mRNA.protein.network')
cim(final.spls.mRNA.protein, comp = 1:2, xlab = "Protein", ylab = "mRNA",
    margins = c(6,8), 
    cutoff = 0.5, save = "png", name.save = "sPLS.mRNA.protein.cim")

4.3 PLS1 analysis of GATA3 vs mRNA

practice.spls.mRNA.GATA3 <- spls(X$mRNA, X$protein[,"GATA3"], ncomp = 1, 
                                 keepX = 8,
                                 keepY = 1,
                                 mode = "canonical")

plotLoadings(practice.spls.mRNA.GATA3, block = "X")

# KDM4B has strong positive association to GATA3, as expected (https://doi.org/10.1093/nar/gkt469)
# CSRP2 has strong negative association to GATA3

selectVar(practice.spls.mRNA.GATA3)$X$name
## [1] "KDM4B"   "CSRP2"   "ZNF552"  "SLC43A3" "PREX1"   "FMNL2"   "FUT8"   
## [8] "LRIG1"
cim(practice.spls.mRNA.GATA3, comp = 1, xlab = "mRNA", ylab = "Samples",
    margins = c(5,4), 
    cutoff = 0, save = "png", name.save = "sPLS.mRNA.GATA3.cim")

5 Session 4

5.1 DIABLO cohort 2

  • How about using the second cohort of samples to form an integrated diablo model, and then classify the samples from the first cohort?
# set "cohort2": n=70 samples with miRNA and mRNA data
X.cohort2 <- list(miRNA = breast.TCGA$data.test$mirna,
                  mRNA = breast.TCGA$data.test$mrna)
Y.cohort2 <- breast.TCGA$data.test$subtype
lapply(X.cohort2, FUN = dim)
## $miRNA
## [1]  70 184
## 
## $mRNA
## [1]  70 200
length(Y.cohort2)
## [1] 70
# design matrix, the relationship between datasets
design.cohort2 <- matrix(0.1, ncol = length(X.cohort2), nrow = length(X.cohort2), 
                         dimnames = list(names(X.cohort2), names(X.cohort2)))
diag(design.cohort2) <- 0
design.cohort2 
##       miRNA mRNA
## miRNA   0.0  0.1
## mRNA    0.1  0.0
diablo.cohort2 <- block.plsda(X.cohort2, 
                              Y.cohort2, 
                              ncomp = 5, 
                              design = design.cohort2)
## Design matrix has changed to include Y; each block will be
##             linked to Y.
set.seed(123) # For reproducibility, remove for your analyses
perf.diablo.cohort2 = perf(diablo.cohort2, validation = 'Mfold',
                           folds = 6, nrepeat = 30, cpus = 3)
plot(perf.diablo.cohort2)

perf.diablo.cohort2$MajorityVote
## list()
ncomp <- perf.diablo.cohort2$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
# result suggests to use 3 components
#ncomp <- 2 # (we could consider choosing a low number of components than suggested)
ncomp
## [1] 3
# testing range, for the number of features to select
test.keepX <- list(miRNA = c(5:7, round(2^(seq(3,5,0.5)))),
                   mRNA = c(5:7, round(2^(seq(3,5,0.5)))))

set.seed(123) # for reproducibility
tune.diablo.cohort2 <- tune.block.splsda(X.cohort2, 
                                         Y.cohort2, ncomp = ncomp,
                                         test.keepX = test.keepX, design = design.cohort2,
                                         validation = 'Mfold', folds = 6, nrepeat = 3,
                                         BPPARAM = BiocParallel::SnowParam(workers = 3),
                                         dist = "centroids.dist")
## Design matrix has changed to include Y; each block will be
##             linked to Y.
## 
## You have provided a sequence of keepX of length: 8 for block miRNA and 8 for block mRNA.
## This results in 64 models being fitted for each component and each nrepeat, this may take some time to run, be patient!
ncomp.cohort2 <- tune.diablo.cohort2$choice.ncomp$ncomp # suggests 3 components per block
keepX.cohort2 <- tune.diablo.cohort2$choice.keepX # suggests keep 5,5,5 and 23,32,11 features

# could also manually input the values (if resuming from this point)
#    opt.ncomp.cohort2 <- 3
#    opt.keepX.cohort2 <- list(miRNA = c(5,5,5),
#                          mRNA = c(23,32,11))


# make a diablo integrated model, sparse feature selection
diablo.cohort2 <- block.splsda(X = X.cohort2, 
                               Y = Y.cohort2, 
                               keepX = keepX.cohort2,
                               ncomp = ncomp.cohort2,
                               design = design.cohort2)
## Design matrix has changed to include Y; each block will be
##             linked to Y.
# obtain the selected features and weights
selectVar(diablo.cohort2, block = 'miRNA', comp = 1)
## $miRNA
## $miRNA$name
## [1] "hsa-mir-500a" "hsa-mir-532"  "hsa-mir-501"  "hsa-mir-505"  "hsa-mir-130b"
## 
## $miRNA$value
##              value.var
## hsa-mir-500a 0.6457262
## hsa-mir-532  0.5547329
## hsa-mir-501  0.3568440
## hsa-mir-505  0.3256011
## hsa-mir-130b 0.2048303
## 
## 
## $comp
## [1] 1
selectVar(diablo.cohort2, block = 'mRNA', comp = 1)
## $mRNA
## $mRNA$name
##  [1] "ZNF552"  "SLC43A3" "LRIG1"   "CCNA2"   "KDM4B"   "PREX1"   "STC2"   
##  [8] "C4orf34" "PLCD4"   "TTC39A"  "KIF13B"  "E2F1"    "NCOA7"   "FAM63A" 
## [15] "HN1"     "PLSCR1"  "HTRA1"   "SLC5A6"  "FMNL2"   "LYN"     "ASPM"   
## [22] "ILDR1"   "IFITM2" 
## 
## $mRNA$value
##           value.var
## ZNF552  -0.50992767
## SLC43A3  0.35895241
## LRIG1   -0.33183812
## CCNA2    0.27389658
## KDM4B   -0.26725842
## PREX1   -0.23349891
## STC2    -0.22267161
## C4orf34 -0.19222356
## PLCD4   -0.19200410
## TTC39A  -0.16507411
## KIF13B  -0.16159680
## E2F1     0.15721232
## NCOA7    0.14641467
## FAM63A  -0.14545800
## HN1      0.12025827
## PLSCR1   0.10814636
## HTRA1   -0.09920337
## SLC5A6   0.09353720
## FMNL2    0.07625932
## LYN      0.05408349
## ASPM     0.03337284
## ILDR1   -0.02768450
## IFITM2  -0.02452071
## 
## 
## $comp
## [1] 1
# if you want to export all selected features to csv:

cohort2.features.list <- list()
for (i in 1:diablo.cohort2$ncomp[1]) {
  cohort2.features.list[[i]] <- data.frame(
    name=c(selectVar(diablo.cohort2, comp=i)$miRNA$name,
           selectVar(diablo.cohort2, comp=i)$mRNA$name),
    value=c(selectVar(diablo.cohort2,comp = i)$miRNA$value$value.var,
            selectVar(diablo.cohort2,comp = i)$mRNA$value$value.var),
    block=c(rep("miRNA", 
                length(selectVar(diablo.cohort2, comp=i)$miRNA$name)),
            rep("mRNA", 
                length(selectVar(diablo.cohort2, comp=i)$mRNA$name))),
    comp=i)
}
cohort2.features.list
## [[1]]
##            name       value block comp
## 1  hsa-mir-500a  0.64572618 miRNA    1
## 2   hsa-mir-532  0.55473287 miRNA    1
## 3   hsa-mir-501  0.35684396 miRNA    1
## 4   hsa-mir-505  0.32560106 miRNA    1
## 5  hsa-mir-130b  0.20483034 miRNA    1
## 6        ZNF552 -0.50992767  mRNA    1
## 7       SLC43A3  0.35895241  mRNA    1
## 8         LRIG1 -0.33183812  mRNA    1
## 9         CCNA2  0.27389658  mRNA    1
## 10        KDM4B -0.26725842  mRNA    1
## 11        PREX1 -0.23349891  mRNA    1
## 12         STC2 -0.22267161  mRNA    1
## 13      C4orf34 -0.19222356  mRNA    1
## 14        PLCD4 -0.19200410  mRNA    1
## 15       TTC39A -0.16507411  mRNA    1
## 16       KIF13B -0.16159680  mRNA    1
## 17         E2F1  0.15721232  mRNA    1
## 18        NCOA7  0.14641467  mRNA    1
## 19       FAM63A -0.14545800  mRNA    1
## 20          HN1  0.12025827  mRNA    1
## 21       PLSCR1  0.10814636  mRNA    1
## 22        HTRA1 -0.09920337  mRNA    1
## 23       SLC5A6  0.09353720  mRNA    1
## 24        FMNL2  0.07625932  mRNA    1
## 25          LYN  0.05408349  mRNA    1
## 26         ASPM  0.03337284  mRNA    1
## 27        ILDR1 -0.02768450  mRNA    1
## 28       IFITM2 -0.02452071  mRNA    1
## 
## [[2]]
##             name        value block comp
## 1    hsa-mir-210  0.691663801 miRNA    2
## 2    hsa-mir-30a -0.658905023 miRNA    2
## 3  hsa-mir-101-2 -0.278946398 miRNA    2
## 4  hsa-mir-30c-2 -0.090035710 miRNA    2
## 5  hsa-mir-101-1 -0.039087532 miRNA    2
## 6          NDRG2 -0.433755926  mRNA    2
## 7          EGLN3  0.377357162  mRNA    2
## 8           SDC1  0.277822044  mRNA    2
## 9         ZNF37B -0.261752330  mRNA    2
## 10      TP53INP2  0.261348927  mRNA    2
## 11         ALCAM  0.220768800  mRNA    2
## 12          WWC2  0.204915471  mRNA    2
## 13         ICAM2 -0.204456064  mRNA    2
## 14         ACADS -0.201529580  mRNA    2
## 15       C4orf34  0.201490928  mRNA    2
## 16        STAT5A -0.201327875  mRNA    2
## 17          ASPH  0.173648063  mRNA    2
## 18          TOB1  0.167600361  mRNA    2
## 19           BOC -0.162058854  mRNA    2
## 20           DBP -0.152383311  mRNA    2
## 21       PLEKHA4 -0.144868552  mRNA    2
## 22        TRIM45 -0.138129580  mRNA    2
## 23         CDK18 -0.107049682  mRNA    2
## 24          FZD4  0.106972519  mRNA    2
## 25         TIGD5 -0.083797631  mRNA    2
## 26           NES -0.074160168  mRNA    2
## 27         IL1R1  0.065884648  mRNA    2
## 28         PSIP1 -0.064468692  mRNA    2
## 29        CPPED1  0.055352187  mRNA    2
## 30         EPHB3 -0.051344666  mRNA    2
## 31         CPT1A  0.039648852  mRNA    2
## 32        FAM63A -0.032676916  mRNA    2
## 33          MTL5 -0.025132140  mRNA    2
## 34         CDCP1  0.015348090  mRNA    2
## 35         S1PR3  0.011992026  mRNA    2
## 36      FLJ23867 -0.009054893  mRNA    2
## 37         YPEL2  0.005398189  mRNA    2
## 
## [[3]]
##              name       value block comp
## 1  hsa-mir-199a-1  0.62323146 miRNA    3
## 2  hsa-mir-199a-2  0.56554380 miRNA    3
## 3    hsa-mir-199b  0.46810016 miRNA    3
## 4    hsa-mir-374a -0.26534076 miRNA    3
## 5     hsa-mir-455  0.04710928 miRNA    3
## 6           POSTN  0.62334841  mRNA    3
## 7            FUT8  0.56936573  mRNA    3
## 8           FRMD6  0.38499141  mRNA    3
## 9           AKAP9  0.21348425  mRNA    3
## 10           TCF4  0.20536352  mRNA    3
## 11           DSG2  0.12269013  mRNA    3
## 12         CCDC80  0.11720266  mRNA    3
## 13           DNLZ -0.10115086  mRNA    3
## 14         COL6A1  0.09396975  mRNA    3
## 15        C7orf55 -0.05067405  mRNA    3
## 16           ZEB1  0.02953547  mRNA    3
cohort2.features.dataframe <- rbind(cohort2.features.list[[1]],
                                    cohort2.features.list[[2]],
                                    cohort2.features.list[[3]])
write.csv(cohort2.features.dataframe, 
          file = "cohort2.diablo.features.csv")



# diablo plots
plotDiablo(diablo.cohort2, ncomp = 1)

plotDiablo(diablo.cohort2, ncomp = 2)

# sample plots
plotIndiv(diablo.cohort2, ind.names = FALSE, legend = TRUE, comp=1:2,
          title = 'Cohort2, DIABLO comp 1 - 2')

# arrow plots
plotArrow(diablo.cohort2, ind.names = FALSE, legend = TRUE, 
          legend.title = "Cancer\nSubtype", comp=1:2, 
          title = "Cohort2, DIABLO comp 1 - 2")

# prepare test data (cohort 1)
data.test.cohort1 <- list(miRNA = breast.TCGA$data.train$mirna, 
                          mRNA = breast.TCGA$data.train$mrna)

# predict the class of cohort1 samples, using the DIABLO model of cohort2
predict.cohort1 <- predict(diablo.cohort2, newdata = data.test.cohort1, 
                           dist = "centroids.dist")

table(predict.cohort1$WeightedVote$centroids.dist[,3])
## 
## Basal  Her2  LumA 
##    45    28    77
confusion.mat.cohort1 <- get.confusion_matrix(truth = breast.TCGA$data.train$subtype, 
                                              predicted = predict.cohort1$WeightedVote$centroids.dist[,3])
confusion.mat.cohort1
##       predicted.as.Basal predicted.as.Her2 predicted.as.LumA
## Basal                 43                 2                 0
## Her2                   2                24                 4
## LumA                   0                 2                73
get.BER(confusion.mat.cohort1)
## [1] 0.09037037